- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4SQL Data Cleaning Missing Data.dsnb
executable file
·1 lines (1 loc) · 10.7 KB
/
OML4SQL Data Cleaning Missing Data.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4SQL Data Cleaning Missing Data","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%md"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","# OML4SQL Data Cleaning: fill in missing data","","In this notebook, we demonstrate how to replace missing values using Oracle SQL and the DBMS_DATA_MINING_TRANSFORM package.","","We use the customer insurance lifetime value data set, which contains customer financial information, lifetime value, and whether or not the customer bought insurance.","","The dataset `CUSTOMER_INSURANCE_LTV_SQL` is generated by the `\"OML Run-me-first\"` notebook, which `MUST` be run before this notebook.","","---","","###### IMPORTANT: The `\"OML Run-me-first\"` notebook is available under the menu `Templates -> Examples` and is a pre-requisite to the current notebook.","","---","","**Note**: when building or applying a model using in-database Oracle Machine Learning algorithms, this operation may *not* be needed separately if automatic data preparation is enabled. Automatic data preparation automatically replaces missing values of numerical attributes with the mean and missing values of categorical attributes with the mode. For example, see <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4sql/23/dmcon/generalized-linear-model.html#GUID-19B8E133-0029-4892-88BB-3E1C9E83EB12\" onclick=\"return ! window.open('https://docs.oracle.com/en/database/oracle/machine-learning/oml4sql/23/dmcon/generalized-linear-model.html#GUID-19B8E133-0029-4892-88BB-3E1C9E83EB12');\"> Data Preparation for GLM <\/a>","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information ...","message":["%md","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a>","* <a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/DBMS_DATA_MINING_TRANSFORM.html\" target=\"_blank\">OML4SQL DBMS_DATA_MINNG_TRANSFORM Documentation<\/a>"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display CUSTOMER_INSURANCE_LTV_SQL data","message":["%sql","","SELECT *","FROM CUSTOMER_INSURANCE_LTV_SQL","FETCH FIRST 10 ROWS ONLY;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Select rows where columns MARITAL_STATUS, CREDIT_BALANCE, BANK_FUNDS, INCOME contain missing values","message":["%sql","","SELECT CUSTOMER_ID, MARITAL_STATUS, CREDIT_BALANCE, BANK_FUNDS, INCOME","FROM CUSTOMER_INSURANCE_LTV_SQL","FETCH FIRST 10 ROWS ONLY;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Create a view to include the columns to be treated","message":["%sql","","CREATE OR REPLACE VIEW CUSTOMER_MISSING_V AS","SELECT *","FROM CUSTOMER_INSURANCE_LTV_SQL;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Compute mode value of each categorical variable","message":["%script","","BEGIN"," EXECUTE IMMEDIATE 'DROP TABLE missc_tbl';"," EXCEPTION WHEN OTHERS THEN NULL;","END;","/","","BEGIN"," DBMS_DATA_MINING_TRANSFORM.CREATE_MISS_CAT("," MISS_TABLE_NAME => 'MISSC_TBL');"," DBMS_DATA_MINING_TRANSFORM.INSERT_MISS_CAT_MODE("," MISS_TABLE_NAME => 'MISSC_TBL',"," DATA_TABLE_NAME => 'CUSTOMER_MISSING_V');","END;","/"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Table MISSC_TBL contains the mode of each categorical variable","message":["%sql","","SELECT COL, VAL","FROM MISSC_TBL"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Create view CUSTOMER_INSURANCE_LTV_SQL_STACK with missing categorical columns replaced by mode","message":["%script","","BEGIN"," EXECUTE IMMEDIATE 'DROP VIEW CUSTOMER_INSURANCE_LTV_SQL_STACK';"," EXCEPTION WHEN OTHERS THEN NULL;","END;","/","DECLARE"," xforms dbms_data_mining_transform.TRANSFORM_LIST;","BEGIN"," DBMS_DATA_MINING_TRANSFORM.STACK_MISS_CAT ("," MISS_TABLE_NAME => 'MISSC_TBL',"," XFORM_LIST => xforms);"," DBMS_DATA_MINING_TRANSFORM.XFORM_STACK ("," XFORM_LIST => xforms,"," DATA_TABLE_NAME => 'CUSTOMER_INSURANCE_LTV_SQL',"," XFORM_VIEW_NAME => 'CUSTOMER_INSURANCE_LTV_SQL_STACK');","END;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"View categorical columns with missing values replaced","message":["%sql","","SELECT CUSTOMER_ID, MARITAL_STATUS","FROM CUSTOMER_INSURANCE_LTV_SQL_STACK","FETCH FIRST 10 ROWS ONLY;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Create a table with the mean for each numeric variable","message":["%script","","BEGIN"," EXECUTE IMMEDIATE 'DROP TABLE miss_num_xtbl';"," EXCEPTION WHEN OTHERS THEN NULL;","END;","/","","BEGIN"," DBMS_DATA_MINING_TRANSFORM.CREATE_MISS_NUM("," MISS_TABLE_NAME => 'MISS_NUM_XTBL');"," DBMS_DATA_MINING_TRANSFORM.INSERT_MISS_NUM_MEAN("," MISS_TABLE_NAME => 'miss_num_xtbl',"," DATA_TABLE_NAME => 'CUSTOMER_MISSING_V',"," EXCLUDE_LIST => DBMS_DATA_MINING_TRANSFORM.COLUMN_LIST('CUSTOMER_ID'));","END;","/"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Table MISS_NUM_XTBL contains the mean of each numeric variable","message":["%sql","","SELECT * ","FROM MISS_NUM_XTBL","FETCH FIRST 10 ROWS ONLY;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Create view CUSTOMER_INSURANCE_LTV_SQL_STACK with numeric missing values replaced with mean","message":["%script","","BEGIN"," EXECUTE IMMEDIATE 'DROP VIEW CUSTOMER_INSURANCE_LTV_SQL_STACK';"," EXCEPTION WHEN OTHERS THEN NULL;","END;","/","DECLARE"," xforms dbms_data_mining_transform.TRANSFORM_LIST;","BEGIN"," dbms_data_mining_transform.STACK_MISS_NUM ("," MISS_TABLE_NAME => 'MISS_NUM_XTBL',"," XFORM_LIST => xforms);"," dbms_data_mining_transform.XFORM_STACK ("," XFORM_LIST => xforms,"," DATA_TABLE_NAME => 'CUSTOMER_INSURANCE_LTV_SQL',"," XFORM_VIEW_NAME => 'CUSTOMER_INSURANCE_LTV_SQL_STACK');","END;","/"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"View numeric columns with missing values replaced","message":["%sql","","SELECT CUSTOMER_ID, CREDIT_BALANCE, BANK_FUNDS, INCOME","FROM CUSTOMER_INSURANCE_LTV_SQL_STACK","FETCH FIRST 10 ROWS ONLY;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","## End of Script"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]